fix: Adding shape distingushing to the engine cache #3154

narendasan · 2024-09-11T04:37:50Z

Description

Adds distingushing base on shape and compile settings to the engine cache

Fixes #3148

Type of change

Please delete options that are not relevant and/or add your own.

Bug fix (non-breaking change which fixes an issue)

Checklist:

My code follows the style guidelines of this project (You can use the linters)
I have performed a self-review of my own code
I have commented my code, particularly in hard-to-understand areas and hacks
I have made corresponding changes to the documentation
I have added tests to verify my fix or my feature
New and existing unit tests pass locally with my changes
I have added the relevant labels to my PR in so that relevant reviewers are notified

github-actions

There are some changes that do not conform to Python style guidelines:

--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_settings.py	2024-09-11 04:38:03.076285+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_settings.py	2024-09-11 04:38:21.932007+00:00
@@ -112,28 +112,31 @@
    lazy_engine_init: bool = LAZY_ENGINE_INIT
    cache_built_engines: bool = CACHE_BUILT_ENGINES
    reuse_cached_engines: bool = REUSE_CACHED_ENGINES


-_SETTINGS_TO_BE_ENGINE_INVARIANT =(
+_SETTINGS_TO_BE_ENGINE_INVARIANT = (
    "enabled_precisions",
    "max_aux_streams",
    "version_compatible",
    "optimization_level",
    "disable_tf32",
    "sparse_weights",
    "make_refittable",
    "engine_capability",
-    "hardware_compatible",)
+    "hardware_compatible",
+)


-def settings_are_compatible(set_a: CompilationSettings, set_b: CompilationSettings) -> Tuple[bool, Set[str]]:
+def settings_are_compatible(
+    set_a: CompilationSettings, set_b: CompilationSettings
+) -> Tuple[bool, Set[str]]:
    incompatible_settings: Set[str] = set()

    for field in _SETTINGS_TO_BE_ENGINE_INVARIANT:
-       if getattr(set_a, field) != getattr(set_b, field):
-           incompatible_settings.add(field)
+        if getattr(set_a, field) != getattr(set_b, field):
+            incompatible_settings.add(field)

    if len(incompatible_settings) == 0:
        return True, set()
    else:
        return False, incompatible_settings
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_engine_cache.py	2024-09-11 04:38:03.076285+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_engine_cache.py	2024-09-11 04:38:21.950622+00:00
@@ -12,16 +12,27 @@

from sympy.polys.matrices.dense import Sequence
import torch
from torch._inductor.codecache import FxGraphCachePickler, sha256_hash
from torch.fx.experimental.proxy_tensor import unset_fake_temporarily
-from torch_tensorrt.dynamo._settings import CompilationSettings, _SETTINGS_TO_BE_ENGINE_INVARIANT
+from torch_tensorrt.dynamo._settings import (
+    CompilationSettings,
+    _SETTINGS_TO_BE_ENGINE_INVARIANT,
+)
from torch_tensorrt._Input import Input

_LOGGER: logging.Logger = logging.getLogger(__name__)

-UnpackedCacheHit = Tuple[bytes, List[str], List[str], Tuple[Input], CompilationSettings, Optional[Dict[Any, Any]]]
+UnpackedCacheHit = Tuple[
+    bytes,
+    List[str],
+    List[str],
+    Tuple[Input],
+    CompilationSettings,
+    Optional[Dict[Any, Any]],
+]
+

class BaseEngineCache(ABC):

    @abstractmethod
    def __init__(
@@ -30,11 +41,15 @@
        **kwargs: Any,
    ) -> None:
        pass

    @staticmethod
-    def get_hash(gm: torch.fx.GraphModule, input_specs: Sequence[Input], settings: CompilationSettings) -> str:
+    def get_hash(
+        gm: torch.fx.GraphModule,
+        input_specs: Sequence[Input],
+        settings: CompilationSettings,
+    ) -> str:
        """Get the hash value of the GraphModule

        Args:
            gm (torch.fx.GraphModule): GraphModule to hash

@@ -53,11 +68,13 @@
        with io.BytesIO() as stream:
            input_specs_data = pickle.dumps(input_spec_strs)
            input_specs_data = pickletools.optimize(input_specs_data)
        input_specs_hash = sha256_hash(input_specs_data)

-        invariant_engine_specs = [str(getattr(settings, field)) for field in _SETTINGS_TO_BE_ENGINE_INVARIANT]
+        invariant_engine_specs = [
+            str(getattr(settings, field)) for field in _SETTINGS_TO_BE_ENGINE_INVARIANT
+        ]
        with io.BytesIO() as stream:
            engine_specs_data = pickle.dumps(invariant_engine_specs)
            engine_specs_data = pickletools.optimize(engine_specs_data)
        engine_specs_hash = sha256_hash(engine_specs_data)

@@ -87,12 +104,11 @@
            bytes: packed blob
        """

        settings = copy.deepcopy(compilation_settings)
        settings.torch_executed_ops = {
-            f"torch.ops.{op.__str__()}"
-            for op in settings.torch_executed_ops
+            f"torch.ops.{op.__str__()}" for op in settings.torch_executed_ops
        }

        return pickle.dumps(
            {
                "serialized_engine": bytes(serialized_engine),
@@ -122,11 +138,13 @@
            unpacked["input_specs"],
            unpacked["compilation_settings"],
            unpacked["weight_name_map"],
        )

-    def insert(self, hash: str, entry: UnpackedCacheHit, *args: Any, **kwargs: Any) -> None:
+    def insert(
+        self, hash: str, entry: UnpackedCacheHit, *args: Any, **kwargs: Any
+    ) -> None:
        """
        Insert a cache entry into the engine cache.

        Args:
            hash (str): The hash value of the GraphModule.
@@ -137,11 +155,10 @@
        Returns:
            None
        """
        packed_cache_info = BaseEngineCache.pack(*entry)
        return self.save(hash, packed_cache_info, *args, **kwargs)
-

    def check(self, hash: str, *args: Any, **kwargs: Any) -> Optional[UnpackedCacheHit]:
        """
        Check if a cache entry exists for the given hash.

--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py	2024-09-11 04:38:03.076285+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py	2024-09-11 04:38:22.587202+00:00
@@ -531,20 +531,35 @@
        if self.engine_cache is not None:
            if (
                self.compilation_settings.cache_built_engines
                or self.compilation_settings.reuse_cached_engines
            ):
-                hash_val = self.engine_cache.get_hash(self.module, self.input_specs, self.compilation_settings)
+                hash_val = self.engine_cache.get_hash(
+                    self.module, self.input_specs, self.compilation_settings
+                )

            if self.compilation_settings.reuse_cached_engines:
                # query the cached TRT engine
                cached_data = self.engine_cache.check(hash_val)
-                if cached_data is not None: # hit the cache
-                    (serialized_engine, self._input_names, self._output_names, engine_input_specs, engine_compilation_settings, self.weight_name_map) = cached_data
-
-                    setting_compatiblity, incompattible_settings = settings_are_compatible(self.compilation_settings, engine_compilation_settings)
-                    assert setting_compatiblity, f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"
+                if cached_data is not None:  # hit the cache
+                    (
+                        serialized_engine,
+                        self._input_names,
+                        self._output_names,
+                        engine_input_specs,
+                        engine_compilation_settings,
+                        self.weight_name_map,
+                    ) = cached_data
+
+                    setting_compatiblity, incompattible_settings = (
+                        settings_are_compatible(
+                            self.compilation_settings, engine_compilation_settings
+                        )
+                    )
+                    assert (
+                        setting_compatiblity
+                    ), f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"

                    _LOGGER.info(
                        "Found the cached engine that corresponds to this graph. It is directly loaded."
                    )

@@ -609,18 +624,21 @@
        )
        if (
            self.engine_cache is not None
            and self.compilation_settings.cache_built_engines
        ):
-            self.engine_cache.insert(hash_val, (
-                serialized_engine,
-                self._input_names,
-                self._output_names,
-                self.input_specs,
-                self.compilation_settings,
-                self.weight_name_map,
-            ))
+            self.engine_cache.insert(
+                hash_val,
+                (
+                    serialized_engine,
+                    self._input_names,
+                    self._output_names,
+                    self.input_specs,
+                    self.compilation_settings,
+                    self.weight_name_map,
+                ),
+            )

        with io.BytesIO() as engine_bytes:
            engine_bytes.write(serialized_engine)
            engine_str = engine_bytes.getvalue()

--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_engine_cache.py	2024-09-11 04:38:03.108285+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_engine_cache.py	2024-09-11 04:38:26.801121+00:00
@@ -16,11 +16,10 @@
from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity

assertions = unittest.TestCase()


-
class MyEngineCache(BaseEngineCache):
    def __init__(
        self,
        engine_cache_dir: str,
    ) -> None:
@@ -55,111 +54,125 @@
                blob = f.read()
            self.hashes[hash] += 1
            return blob
        return None

+
class TestHashFunction(TestCase):

    def test_reexport_is_equal(self):
        pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
        example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
        batch = torch.export.Dim("batch", min=1, max=200)

        exp_program1 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs1 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(100, 3, 224, 224),
+                max_shape=(200, 3, 224, 224),
+            ),
+        )
        settings1 = CompilationSettings(
-            make_refittable=True,
-            cache_built_engines=True,
-            reuse_cached_engines=True
+            make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
        )
        hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)

        exp_program2 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs2 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs2 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(100, 3, 224, 224),
+                max_shape=(200, 3, 224, 224),
+            ),
+        )
        settings2 = CompilationSettings(
-            make_refittable=True,
-            cache_built_engines=True,
-            reuse_cached_engines=True
+            make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
        )
        hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)

        self.assertEqual(hash1, hash2)
-
-

    def test_input_shape_change_is_not_equal(self):
        pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
        example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
        batch = torch.export.Dim("batch", min=1, max=200)

        exp_program1 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs1 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(100, 3, 224, 224),
+                max_shape=(200, 3, 224, 224),
+            ),
+        )
        settings1 = CompilationSettings(
-            make_refittable=True,
-            cache_built_engines=True,
-            reuse_cached_engines=True
+            make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
        )
        hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)

        exp_program2 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs2 = (torch_trt.Input(min_shape=(1, 3, 300, 300), opt_shape=(100, 3, 300, 300), max_shape=(200, 3, 300, 300)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs2 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 300, 300),
+                opt_shape=(100, 3, 300, 300),
+                max_shape=(200, 3, 300, 300),
+            ),
+        )
        settings2 = CompilationSettings(
-            make_refittable=True,
-            cache_built_engines=True,
-            reuse_cached_engines=True
+            make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
        )
        hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)

        self.assertNotEqual(hash1, hash2)
-

    def test_engine_settings_is_not_equal(self):
        pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
        example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
        batch = torch.export.Dim("batch", min=1, max=200)

        exp_program1 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs1 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(100, 3, 224, 224),
+                max_shape=(200, 3, 224, 224),
+            ),
+        )
        settings1 = CompilationSettings(
            make_refittable=True,
            cache_built_engines=True,
            reuse_cached_engines=True,
-            enabled_precisions={torch.float32}
+            enabled_precisions={torch.float32},
        )
        hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)

        exp_program2 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs2 = (torch_trt.Input(min_shape=(1, 3, 300, 300), opt_shape=(100, 3, 300, 300), max_shape=(200, 3, 300, 300)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs2 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 300, 300),
+                opt_shape=(100, 3, 300, 300),
+                max_shape=(200, 3, 300, 300),
+            ),
+        )
        settings2 = CompilationSettings(
            make_refittable=True,
            cache_built_engines=True,
            reuse_cached_engines=True,
-            enabled_precisions={torch.float32, torch.float16}
+            enabled_precisions={torch.float32, torch.float16},
        )
        hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)

        self.assertNotEqual(hash1, hash2)

@@ -207,11 +220,11 @@
                debug=False,
                min_block_size=1,
                make_refittable=True,
                cache_built_engines=cache_built_engines,
                reuse_cached_engines=reuse_cached_engines,
-                engine_cache_dir=engine_cache_dir
+                engine_cache_dir=engine_cache_dir,
            )
            end.record()
            torch.cuda.synchronize()
            torch._dynamo.reset()
            times.append(start.elapsed_time(end))
@@ -288,12 +301,17 @@
        assertions.assertTrue(
            cos_sim > COSINE_THRESHOLD,
            msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
        )

-        [assertions.assertTrue(count == 1, f"cache was not hit exactly once for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
-
+        [
+            assertions.assertTrue(
+                count == 1,
+                f"cache was not hit exactly once for entry ({h}, hit: {count})",
+            )
+            for h, count in custom_engine_cache.hashes.items()
+        ]

    def test_dynamo_compile_change_input_shape(self):
        """Runs compilation 3 times, the cache should miss each time"""
        model = models.resnet18(pretrained=True).eval().to("cuda")
        # Mark the dim0 of inputs as dynamic
@@ -303,11 +321,11 @@
            shutil.rmtree(engine_cache_dir)

        custom_engine_cache = MyEngineCache(engine_cache_dir)

        for i in range(3):
-            inputs = (torch.rand((4*(i + 1), 3, 224, 224)).to("cuda"),)
+            inputs = (torch.rand((4 * (i + 1), 3, 224, 224)).to("cuda"),)
            trt_gm = torch_trt.dynamo.compile(
                torch.export.export(model, args=inputs),
                inputs=inputs,
                use_python_runtime=False,
                enabled_precisions={torch.float},
@@ -316,11 +334,16 @@
                make_refittable=True,
                cache_built_engines=True,
                reuse_cached_engines=True,
            )

-        [assertions.assertTrue(count == 0, f"Unintended cache hit for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
+        [
+            assertions.assertTrue(
+                count == 0, f"Unintended cache hit for entry ({h}, hit: {count})"
+            )
+            for h, count in custom_engine_cache.hashes.items()
+        ]

    @pytest.mark.xfail
    def test_torch_compile_with_default_disk_engine_cache(self):
        # Custom Engine Cache
        model = models.resnet18(pretrained=True).eval().to("cuda")
@@ -360,11 +383,11 @@
                    "make_refittable": True,
                    "cache_built_engines": cache_built_engines,
                    "reuse_cached_engines": reuse_cached_engines,
                    "engine_cache_dir": engine_cache_dir,
                    "engine_cache_size": 1 << 30,  # 1GB
-                    "torch_executed_ops": {"torch.ops.aten.relu.default"}
+                    "torch_executed_ops": {"torch.ops.aten.relu.default"},
                },
            )
            results.append(compiled_model(*inputs))  # trigger the compilation
            end.record()
            torch.cuda.synchronize()
@@ -426,11 +449,11 @@
                    "min_block_size": 1,
                    "make_refittable": True,
                    "cache_built_engines": cache_built_engines,
                    "reuse_cached_engines": reuse_cached_engines,
                    "custom_engine_cache": custom_engine_cache,
-                    "torch_executed_ops": {"torch.ops.aten.relu.default"}
+                    "torch_executed_ops": {"torch.ops.aten.relu.default"},
                },
            )
            results.append(compiled_model(*inputs))  # trigger the compilation
            end.record()
            torch.cuda.synchronize()
@@ -447,13 +470,17 @@
        assertions.assertTrue(
            cos_sim > COSINE_THRESHOLD,
            msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
        )

-        [assertions.assertTrue(count == 1, f"cache was not hit exactly once for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
-
-
+        [
+            assertions.assertTrue(
+                count == 1,
+                f"cache was not hit exactly once for entry ({h}, hit: {count})",
+            )
+            for h, count in custom_engine_cache.hashes.items()
+        ]

    def test_torch_compile_change_input_shape(self):
        # Custom Engine Cache
        model = models.resnet18(pretrained=True).eval().to("cuda")

@@ -475,10 +502,15 @@
                    "min_block_size": 1,
                    "make_refittable": True,
                    "cache_built_engines": True,
                    "reuse_cached_engines": True,
                    "custom_engine_cache": custom_engine_cache,
-                    "torch_executed_ops": {"torch.ops.aten.relu.default"}
+                    "torch_executed_ops": {"torch.ops.aten.relu.default"},
                },
            )

-        [assertions.assertTrue(count == 0, f"Unintended cache hit for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
+        [
+            assertions.assertTrue(
+                count == 0, f"Unintended cache hit for entry ({h}, hit: {count})"
+            )
+            for h, count in custom_engine_cache.hashes.items()
+        ]
--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_models_export.py	2024-09-11 04:38:03.108285+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_models_export.py	2024-09-11 04:38:26.804639+00:00
@@ -196,11 +196,12 @@
@unittest.skipIf(
    torch.cuda.get_device_properties(torch.cuda.current_device()).major < 9,
    "FP8 compilation in Torch-TRT is not supported on cards older than Hopper",
)
@unittest.skipIf(
-    not importlib.util.find_spec("modelopt"), reason="ModelOpt is necessary to run this test"
+    not importlib.util.find_spec("modelopt"),
+    reason="ModelOpt is necessary to run this test",
)
@pytest.mark.unit
def test_base_fp8(ir):
    import modelopt

@@ -244,12 +245,14 @@
                reuse_cached_engines=False,
            )
            outputs_trt = trt_model(input_tensor)
            assert torch.allclose(output_pyt, outputs_trt, rtol=1e-3, atol=1e-2)

+
@unittest.skipIf(
-    not importlib.util.find_spec("modelopt") or Version(importlib.metadata.version("modelopt")) < Version("0.16.1"),
+    not importlib.util.find_spec("modelopt")
+    or Version(importlib.metadata.version("modelopt")) < Version("0.16.1"),
    "modelopt 0.16.1 or later is required Int8 quantization is supported in modelopt since 0.16.1 or later",
)
@pytest.mark.unit
def test_base_int8(ir):
    import modelopt

github-actions

There are some changes that do not conform to Python style guidelines:

--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_settings.py	2024-09-11 04:38:03.025108+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_settings.py	2024-09-11 04:38:26.592145+00:00
@@ -112,28 +112,31 @@
    lazy_engine_init: bool = LAZY_ENGINE_INIT
    cache_built_engines: bool = CACHE_BUILT_ENGINES
    reuse_cached_engines: bool = REUSE_CACHED_ENGINES


-_SETTINGS_TO_BE_ENGINE_INVARIANT =(
+_SETTINGS_TO_BE_ENGINE_INVARIANT = (
    "enabled_precisions",
    "max_aux_streams",
    "version_compatible",
    "optimization_level",
    "disable_tf32",
    "sparse_weights",
    "make_refittable",
    "engine_capability",
-    "hardware_compatible",)
+    "hardware_compatible",
+)


-def settings_are_compatible(set_a: CompilationSettings, set_b: CompilationSettings) -> Tuple[bool, Set[str]]:
+def settings_are_compatible(
+    set_a: CompilationSettings, set_b: CompilationSettings
+) -> Tuple[bool, Set[str]]:
    incompatible_settings: Set[str] = set()

    for field in _SETTINGS_TO_BE_ENGINE_INVARIANT:
-       if getattr(set_a, field) != getattr(set_b, field):
-           incompatible_settings.add(field)
+        if getattr(set_a, field) != getattr(set_b, field):
+            incompatible_settings.add(field)

    if len(incompatible_settings) == 0:
        return True, set()
    else:
        return False, incompatible_settings
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_engine_cache.py	2024-09-11 04:38:03.025108+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_engine_cache.py	2024-09-11 04:38:26.643775+00:00
@@ -12,16 +12,27 @@

from sympy.polys.matrices.dense import Sequence
import torch
from torch._inductor.codecache import FxGraphCachePickler, sha256_hash
from torch.fx.experimental.proxy_tensor import unset_fake_temporarily
-from torch_tensorrt.dynamo._settings import CompilationSettings, _SETTINGS_TO_BE_ENGINE_INVARIANT
+from torch_tensorrt.dynamo._settings import (
+    CompilationSettings,
+    _SETTINGS_TO_BE_ENGINE_INVARIANT,
+)
from torch_tensorrt._Input import Input

_LOGGER: logging.Logger = logging.getLogger(__name__)

-UnpackedCacheHit = Tuple[bytes, List[str], List[str], Tuple[Input], CompilationSettings, Optional[Dict[Any, Any]]]
+UnpackedCacheHit = Tuple[
+    bytes,
+    List[str],
+    List[str],
+    Tuple[Input],
+    CompilationSettings,
+    Optional[Dict[Any, Any]],
+]
+

class BaseEngineCache(ABC):

    @abstractmethod
    def __init__(
@@ -30,11 +41,15 @@
        **kwargs: Any,
    ) -> None:
        pass

    @staticmethod
-    def get_hash(gm: torch.fx.GraphModule, input_specs: Sequence[Input], settings: CompilationSettings) -> str:
+    def get_hash(
+        gm: torch.fx.GraphModule,
+        input_specs: Sequence[Input],
+        settings: CompilationSettings,
+    ) -> str:
        """Get the hash value of the GraphModule

        Args:
            gm (torch.fx.GraphModule): GraphModule to hash

@@ -53,11 +68,13 @@
        with io.BytesIO() as stream:
            input_specs_data = pickle.dumps(input_spec_strs)
            input_specs_data = pickletools.optimize(input_specs_data)
        input_specs_hash = sha256_hash(input_specs_data)

-        invariant_engine_specs = [str(getattr(settings, field)) for field in _SETTINGS_TO_BE_ENGINE_INVARIANT]
+        invariant_engine_specs = [
+            str(getattr(settings, field)) for field in _SETTINGS_TO_BE_ENGINE_INVARIANT
+        ]
        with io.BytesIO() as stream:
            engine_specs_data = pickle.dumps(invariant_engine_specs)
            engine_specs_data = pickletools.optimize(engine_specs_data)
        engine_specs_hash = sha256_hash(engine_specs_data)

@@ -87,12 +104,11 @@
            bytes: packed blob
        """

        settings = copy.deepcopy(compilation_settings)
        settings.torch_executed_ops = {
-            f"torch.ops.{op.__str__()}"
-            for op in settings.torch_executed_ops
+            f"torch.ops.{op.__str__()}" for op in settings.torch_executed_ops
        }

        return pickle.dumps(
            {
                "serialized_engine": bytes(serialized_engine),
@@ -122,11 +138,13 @@
            unpacked["input_specs"],
            unpacked["compilation_settings"],
            unpacked["weight_name_map"],
        )

-    def insert(self, hash: str, entry: UnpackedCacheHit, *args: Any, **kwargs: Any) -> None:
+    def insert(
+        self, hash: str, entry: UnpackedCacheHit, *args: Any, **kwargs: Any
+    ) -> None:
        """
        Insert a cache entry into the engine cache.

        Args:
            hash (str): The hash value of the GraphModule.
@@ -137,11 +155,10 @@
        Returns:
            None
        """
        packed_cache_info = BaseEngineCache.pack(*entry)
        return self.save(hash, packed_cache_info, *args, **kwargs)
-

    def check(self, hash: str, *args: Any, **kwargs: Any) -> Optional[UnpackedCacheHit]:
        """
        Check if a cache entry exists for the given hash.

--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py	2024-09-11 04:38:03.025108+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py	2024-09-11 04:38:27.224048+00:00
@@ -531,20 +531,35 @@
        if self.engine_cache is not None:
            if (
                self.compilation_settings.cache_built_engines
                or self.compilation_settings.reuse_cached_engines
            ):
-                hash_val = self.engine_cache.get_hash(self.module, self.input_specs, self.compilation_settings)
+                hash_val = self.engine_cache.get_hash(
+                    self.module, self.input_specs, self.compilation_settings
+                )

            if self.compilation_settings.reuse_cached_engines:
                # query the cached TRT engine
                cached_data = self.engine_cache.check(hash_val)
-                if cached_data is not None: # hit the cache
-                    (serialized_engine, self._input_names, self._output_names, engine_input_specs, engine_compilation_settings, self.weight_name_map) = cached_data
-
-                    setting_compatiblity, incompattible_settings = settings_are_compatible(self.compilation_settings, engine_compilation_settings)
-                    assert setting_compatiblity, f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"
+                if cached_data is not None:  # hit the cache
+                    (
+                        serialized_engine,
+                        self._input_names,
+                        self._output_names,
+                        engine_input_specs,
+                        engine_compilation_settings,
+                        self.weight_name_map,
+                    ) = cached_data
+
+                    setting_compatiblity, incompattible_settings = (
+                        settings_are_compatible(
+                            self.compilation_settings, engine_compilation_settings
+                        )
+                    )
+                    assert (
+                        setting_compatiblity
+                    ), f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"

                    _LOGGER.info(
                        "Found the cached engine that corresponds to this graph. It is directly loaded."
                    )

@@ -609,18 +624,21 @@
        )
        if (
            self.engine_cache is not None
            and self.compilation_settings.cache_built_engines
        ):
-            self.engine_cache.insert(hash_val, (
-                serialized_engine,
-                self._input_names,
-                self._output_names,
-                self.input_specs,
-                self.compilation_settings,
-                self.weight_name_map,
-            ))
+            self.engine_cache.insert(
+                hash_val,
+                (
+                    serialized_engine,
+                    self._input_names,
+                    self._output_names,
+                    self.input_specs,
+                    self.compilation_settings,
+                    self.weight_name_map,
+                ),
+            )

        with io.BytesIO() as engine_bytes:
            engine_bytes.write(serialized_engine)
            engine_str = engine_bytes.getvalue()

--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_engine_cache.py	2024-09-11 04:38:03.053108+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_engine_cache.py	2024-09-11 04:38:31.558325+00:00
@@ -16,11 +16,10 @@
from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity

assertions = unittest.TestCase()


-
class MyEngineCache(BaseEngineCache):
    def __init__(
        self,
        engine_cache_dir: str,
    ) -> None:
@@ -55,111 +54,125 @@
                blob = f.read()
            self.hashes[hash] += 1
            return blob
        return None

+
class TestHashFunction(TestCase):

    def test_reexport_is_equal(self):
        pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
        example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
        batch = torch.export.Dim("batch", min=1, max=200)

        exp_program1 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs1 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(100, 3, 224, 224),
+                max_shape=(200, 3, 224, 224),
+            ),
+        )
        settings1 = CompilationSettings(
-            make_refittable=True,
-            cache_built_engines=True,
-            reuse_cached_engines=True
+            make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
        )
        hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)

        exp_program2 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs2 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs2 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(100, 3, 224, 224),
+                max_shape=(200, 3, 224, 224),
+            ),
+        )
        settings2 = CompilationSettings(
-            make_refittable=True,
-            cache_built_engines=True,
-            reuse_cached_engines=True
+            make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
        )
        hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)

        self.assertEqual(hash1, hash2)
-
-

    def test_input_shape_change_is_not_equal(self):
        pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
        example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
        batch = torch.export.Dim("batch", min=1, max=200)

        exp_program1 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs1 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(100, 3, 224, 224),
+                max_shape=(200, 3, 224, 224),
+            ),
+        )
        settings1 = CompilationSettings(
-            make_refittable=True,
-            cache_built_engines=True,
-            reuse_cached_engines=True
+            make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
        )
        hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)

        exp_program2 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs2 = (torch_trt.Input(min_shape=(1, 3, 300, 300), opt_shape=(100, 3, 300, 300), max_shape=(200, 3, 300, 300)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs2 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 300, 300),
+                opt_shape=(100, 3, 300, 300),
+                max_shape=(200, 3, 300, 300),
+            ),
+        )
        settings2 = CompilationSettings(
-            make_refittable=True,
-            cache_built_engines=True,
-            reuse_cached_engines=True
+            make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
        )
        hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)

        self.assertNotEqual(hash1, hash2)
-

    def test_engine_settings_is_not_equal(self):
        pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
        example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
        batch = torch.export.Dim("batch", min=1, max=200)

        exp_program1 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs1 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(100, 3, 224, 224),
+                max_shape=(200, 3, 224, 224),
+            ),
+        )
        settings1 = CompilationSettings(
            make_refittable=True,
            cache_built_engines=True,
            reuse_cached_engines=True,
-            enabled_precisions={torch.float32}
+            enabled_precisions={torch.float32},
        )
        hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)

        exp_program2 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs2 = (torch_trt.Input(min_shape=(1, 3, 300, 300), opt_shape=(100, 3, 300, 300), max_shape=(200, 3, 300, 300)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs2 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 300, 300),
+                opt_shape=(100, 3, 300, 300),
+                max_shape=(200, 3, 300, 300),
+            ),
+        )
        settings2 = CompilationSettings(
            make_refittable=True,
            cache_built_engines=True,
            reuse_cached_engines=True,
-            enabled_precisions={torch.float32, torch.float16}
+            enabled_precisions={torch.float32, torch.float16},
        )
        hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)

        self.assertNotEqual(hash1, hash2)

@@ -207,11 +220,11 @@
                debug=False,
                min_block_size=1,
                make_refittable=True,
                cache_built_engines=cache_built_engines,
                reuse_cached_engines=reuse_cached_engines,
-                engine_cache_dir=engine_cache_dir
+                engine_cache_dir=engine_cache_dir,
            )
            end.record()
            torch.cuda.synchronize()
            torch._dynamo.reset()
            times.append(start.elapsed_time(end))
@@ -288,12 +301,17 @@
        assertions.assertTrue(
            cos_sim > COSINE_THRESHOLD,
            msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
        )

-        [assertions.assertTrue(count == 1, f"cache was not hit exactly once for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
-
+        [
+            assertions.assertTrue(
+                count == 1,
+                f"cache was not hit exactly once for entry ({h}, hit: {count})",
+            )
+            for h, count in custom_engine_cache.hashes.items()
+        ]

    def test_dynamo_compile_change_input_shape(self):
        """Runs compilation 3 times, the cache should miss each time"""
        model = models.resnet18(pretrained=True).eval().to("cuda")
        # Mark the dim0 of inputs as dynamic
@@ -303,11 +321,11 @@
            shutil.rmtree(engine_cache_dir)

        custom_engine_cache = MyEngineCache(engine_cache_dir)

        for i in range(3):
-            inputs = (torch.rand((4*(i + 1), 3, 224, 224)).to("cuda"),)
+            inputs = (torch.rand((4 * (i + 1), 3, 224, 224)).to("cuda"),)
            trt_gm = torch_trt.dynamo.compile(
                torch.export.export(model, args=inputs),
                inputs=inputs,
                use_python_runtime=False,
                enabled_precisions={torch.float},
@@ -316,11 +334,16 @@
                make_refittable=True,
                cache_built_engines=True,
                reuse_cached_engines=True,
            )

-        [assertions.assertTrue(count == 0, f"Unintended cache hit for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
+        [
+            assertions.assertTrue(
+                count == 0, f"Unintended cache hit for entry ({h}, hit: {count})"
+            )
+            for h, count in custom_engine_cache.hashes.items()
+        ]

    @pytest.mark.xfail
    def test_torch_compile_with_default_disk_engine_cache(self):
        # Custom Engine Cache
        model = models.resnet18(pretrained=True).eval().to("cuda")
@@ -360,11 +383,11 @@
                    "make_refittable": True,
                    "cache_built_engines": cache_built_engines,
                    "reuse_cached_engines": reuse_cached_engines,
                    "engine_cache_dir": engine_cache_dir,
                    "engine_cache_size": 1 << 30,  # 1GB
-                    "torch_executed_ops": {"torch.ops.aten.relu.default"}
+                    "torch_executed_ops": {"torch.ops.aten.relu.default"},
                },
            )
            results.append(compiled_model(*inputs))  # trigger the compilation
            end.record()
            torch.cuda.synchronize()
@@ -426,11 +449,11 @@
                    "min_block_size": 1,
                    "make_refittable": True,
                    "cache_built_engines": cache_built_engines,
                    "reuse_cached_engines": reuse_cached_engines,
                    "custom_engine_cache": custom_engine_cache,
-                    "torch_executed_ops": {"torch.ops.aten.relu.default"}
+                    "torch_executed_ops": {"torch.ops.aten.relu.default"},
                },
            )
            results.append(compiled_model(*inputs))  # trigger the compilation
            end.record()
            torch.cuda.synchronize()
@@ -447,13 +470,17 @@
        assertions.assertTrue(
            cos_sim > COSINE_THRESHOLD,
            msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
        )

-        [assertions.assertTrue(count == 1, f"cache was not hit exactly once for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
-
-
+        [
+            assertions.assertTrue(
+                count == 1,
+                f"cache was not hit exactly once for entry ({h}, hit: {count})",
+            )
+            for h, count in custom_engine_cache.hashes.items()
+        ]

    def test_torch_compile_change_input_shape(self):
        # Custom Engine Cache
        model = models.resnet18(pretrained=True).eval().to("cuda")

@@ -475,10 +502,15 @@
                    "min_block_size": 1,
                    "make_refittable": True,
                    "cache_built_engines": True,
                    "reuse_cached_engines": True,
                    "custom_engine_cache": custom_engine_cache,
-                    "torch_executed_ops": {"torch.ops.aten.relu.default"}
+                    "torch_executed_ops": {"torch.ops.aten.relu.default"},
                },
            )

-        [assertions.assertTrue(count == 0, f"Unintended cache hit for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
+        [
+            assertions.assertTrue(
+                count == 0, f"Unintended cache hit for entry ({h}, hit: {count})"
+            )
+            for h, count in custom_engine_cache.hashes.items()
+        ]
--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_models_export.py	2024-09-11 04:38:03.057108+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_models_export.py	2024-09-11 04:38:31.564254+00:00
@@ -196,11 +196,12 @@
@unittest.skipIf(
    torch.cuda.get_device_properties(torch.cuda.current_device()).major < 9,
    "FP8 compilation in Torch-TRT is not supported on cards older than Hopper",
)
@unittest.skipIf(
-    not importlib.util.find_spec("modelopt"), reason="ModelOpt is necessary to run this test"
+    not importlib.util.find_spec("modelopt"),
+    reason="ModelOpt is necessary to run this test",
)
@pytest.mark.unit
def test_base_fp8(ir):
    import modelopt

@@ -244,12 +245,14 @@
                reuse_cached_engines=False,
            )
            outputs_trt = trt_model(input_tensor)
            assert torch.allclose(output_pyt, outputs_trt, rtol=1e-3, atol=1e-2)

+
@unittest.skipIf(
-    not importlib.util.find_spec("modelopt") or Version(importlib.metadata.version("modelopt")) < Version("0.16.1"),
+    not importlib.util.find_spec("modelopt")
+    or Version(importlib.metadata.version("modelopt")) < Version("0.16.1"),
    "modelopt 0.16.1 or later is required Int8 quantization is supported in modelopt since 0.16.1 or later",
)
@pytest.mark.unit
def test_base_int8(ir):
    import modelopt

github-actions

There are some changes that do not conform to Python style guidelines:

--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_settings.py	2024-09-11 04:38:04.275319+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_settings.py	2024-09-11 04:38:27.851601+00:00
@@ -112,28 +112,31 @@
    lazy_engine_init: bool = LAZY_ENGINE_INIT
    cache_built_engines: bool = CACHE_BUILT_ENGINES
    reuse_cached_engines: bool = REUSE_CACHED_ENGINES


-_SETTINGS_TO_BE_ENGINE_INVARIANT =(
+_SETTINGS_TO_BE_ENGINE_INVARIANT = (
    "enabled_precisions",
    "max_aux_streams",
    "version_compatible",
    "optimization_level",
    "disable_tf32",
    "sparse_weights",
    "make_refittable",
    "engine_capability",
-    "hardware_compatible",)
+    "hardware_compatible",
+)


-def settings_are_compatible(set_a: CompilationSettings, set_b: CompilationSettings) -> Tuple[bool, Set[str]]:
+def settings_are_compatible(
+    set_a: CompilationSettings, set_b: CompilationSettings
+) -> Tuple[bool, Set[str]]:
    incompatible_settings: Set[str] = set()

    for field in _SETTINGS_TO_BE_ENGINE_INVARIANT:
-       if getattr(set_a, field) != getattr(set_b, field):
-           incompatible_settings.add(field)
+        if getattr(set_a, field) != getattr(set_b, field):
+            incompatible_settings.add(field)

    if len(incompatible_settings) == 0:
        return True, set()
    else:
        return False, incompatible_settings
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_engine_cache.py	2024-09-11 04:38:04.275319+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_engine_cache.py	2024-09-11 04:38:27.896330+00:00
@@ -12,16 +12,27 @@

from sympy.polys.matrices.dense import Sequence
import torch
from torch._inductor.codecache import FxGraphCachePickler, sha256_hash
from torch.fx.experimental.proxy_tensor import unset_fake_temporarily
-from torch_tensorrt.dynamo._settings import CompilationSettings, _SETTINGS_TO_BE_ENGINE_INVARIANT
+from torch_tensorrt.dynamo._settings import (
+    CompilationSettings,
+    _SETTINGS_TO_BE_ENGINE_INVARIANT,
+)
from torch_tensorrt._Input import Input

_LOGGER: logging.Logger = logging.getLogger(__name__)

-UnpackedCacheHit = Tuple[bytes, List[str], List[str], Tuple[Input], CompilationSettings, Optional[Dict[Any, Any]]]
+UnpackedCacheHit = Tuple[
+    bytes,
+    List[str],
+    List[str],
+    Tuple[Input],
+    CompilationSettings,
+    Optional[Dict[Any, Any]],
+]
+

class BaseEngineCache(ABC):

    @abstractmethod
    def __init__(
@@ -30,11 +41,15 @@
        **kwargs: Any,
    ) -> None:
        pass

    @staticmethod
-    def get_hash(gm: torch.fx.GraphModule, input_specs: Sequence[Input], settings: CompilationSettings) -> str:
+    def get_hash(
+        gm: torch.fx.GraphModule,
+        input_specs: Sequence[Input],
+        settings: CompilationSettings,
+    ) -> str:
        """Get the hash value of the GraphModule

        Args:
            gm (torch.fx.GraphModule): GraphModule to hash

@@ -53,11 +68,13 @@
        with io.BytesIO() as stream:
            input_specs_data = pickle.dumps(input_spec_strs)
            input_specs_data = pickletools.optimize(input_specs_data)
        input_specs_hash = sha256_hash(input_specs_data)

-        invariant_engine_specs = [str(getattr(settings, field)) for field in _SETTINGS_TO_BE_ENGINE_INVARIANT]
+        invariant_engine_specs = [
+            str(getattr(settings, field)) for field in _SETTINGS_TO_BE_ENGINE_INVARIANT
+        ]
        with io.BytesIO() as stream:
            engine_specs_data = pickle.dumps(invariant_engine_specs)
            engine_specs_data = pickletools.optimize(engine_specs_data)
        engine_specs_hash = sha256_hash(engine_specs_data)

@@ -87,12 +104,11 @@
            bytes: packed blob
        """

        settings = copy.deepcopy(compilation_settings)
        settings.torch_executed_ops = {
-            f"torch.ops.{op.__str__()}"
-            for op in settings.torch_executed_ops
+            f"torch.ops.{op.__str__()}" for op in settings.torch_executed_ops
        }

        return pickle.dumps(
            {
                "serialized_engine": bytes(serialized_engine),
@@ -122,11 +138,13 @@
            unpacked["input_specs"],
            unpacked["compilation_settings"],
            unpacked["weight_name_map"],
        )

-    def insert(self, hash: str, entry: UnpackedCacheHit, *args: Any, **kwargs: Any) -> None:
+    def insert(
+        self, hash: str, entry: UnpackedCacheHit, *args: Any, **kwargs: Any
+    ) -> None:
        """
        Insert a cache entry into the engine cache.

        Args:
            hash (str): The hash value of the GraphModule.
@@ -137,11 +155,10 @@
        Returns:
            None
        """
        packed_cache_info = BaseEngineCache.pack(*entry)
        return self.save(hash, packed_cache_info, *args, **kwargs)
-

    def check(self, hash: str, *args: Any, **kwargs: Any) -> Optional[UnpackedCacheHit]:
        """
        Check if a cache entry exists for the given hash.

--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py	2024-09-11 04:38:04.275319+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py	2024-09-11 04:38:28.499150+00:00
@@ -531,20 +531,35 @@
        if self.engine_cache is not None:
            if (
                self.compilation_settings.cache_built_engines
                or self.compilation_settings.reuse_cached_engines
            ):
-                hash_val = self.engine_cache.get_hash(self.module, self.input_specs, self.compilation_settings)
+                hash_val = self.engine_cache.get_hash(
+                    self.module, self.input_specs, self.compilation_settings
+                )

            if self.compilation_settings.reuse_cached_engines:
                # query the cached TRT engine
                cached_data = self.engine_cache.check(hash_val)
-                if cached_data is not None: # hit the cache
-                    (serialized_engine, self._input_names, self._output_names, engine_input_specs, engine_compilation_settings, self.weight_name_map) = cached_data
-
-                    setting_compatiblity, incompattible_settings = settings_are_compatible(self.compilation_settings, engine_compilation_settings)
-                    assert setting_compatiblity, f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"
+                if cached_data is not None:  # hit the cache
+                    (
+                        serialized_engine,
+                        self._input_names,
+                        self._output_names,
+                        engine_input_specs,
+                        engine_compilation_settings,
+                        self.weight_name_map,
+                    ) = cached_data
+
+                    setting_compatiblity, incompattible_settings = (
+                        settings_are_compatible(
+                            self.compilation_settings, engine_compilation_settings
+                        )
+                    )
+                    assert (
+                        setting_compatiblity
+                    ), f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"

                    _LOGGER.info(
                        "Found the cached engine that corresponds to this graph. It is directly loaded."
                    )

@@ -609,18 +624,21 @@
        )
        if (
            self.engine_cache is not None
            and self.compilation_settings.cache_built_engines
        ):
-            self.engine_cache.insert(hash_val, (
-                serialized_engine,
-                self._input_names,
-                self._output_names,
-                self.input_specs,
-                self.compilation_settings,
-                self.weight_name_map,
-            ))
+            self.engine_cache.insert(
+                hash_val,
+                (
+                    serialized_engine,
+                    self._input_names,
+                    self._output_names,
+                    self.input_specs,
+                    self.compilation_settings,
+                    self.weight_name_map,
+                ),
+            )

        with io.BytesIO() as engine_bytes:
            engine_bytes.write(serialized_engine)
            engine_str = engine_bytes.getvalue()

--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_models_export.py	2024-09-11 04:38:04.303319+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_models_export.py	2024-09-11 04:38:32.809570+00:00
@@ -196,11 +196,12 @@
@unittest.skipIf(
    torch.cuda.get_device_properties(torch.cuda.current_device()).major < 9,
    "FP8 compilation in Torch-TRT is not supported on cards older than Hopper",
)
@unittest.skipIf(
-    not importlib.util.find_spec("modelopt"), reason="ModelOpt is necessary to run this test"
+    not importlib.util.find_spec("modelopt"),
+    reason="ModelOpt is necessary to run this test",
)
@pytest.mark.unit
def test_base_fp8(ir):
    import modelopt

@@ -244,12 +245,14 @@
                reuse_cached_engines=False,
            )
            outputs_trt = trt_model(input_tensor)
            assert torch.allclose(output_pyt, outputs_trt, rtol=1e-3, atol=1e-2)

+
@unittest.skipIf(
-    not importlib.util.find_spec("modelopt") or Version(importlib.metadata.version("modelopt")) < Version("0.16.1"),
+    not importlib.util.find_spec("modelopt")
+    or Version(importlib.metadata.version("modelopt")) < Version("0.16.1"),
    "modelopt 0.16.1 or later is required Int8 quantization is supported in modelopt since 0.16.1 or later",
)
@pytest.mark.unit
def test_base_int8(ir):
    import modelopt
--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_engine_cache.py	2024-09-11 04:38:04.303319+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/models/test_engine_cache.py	2024-09-11 04:38:32.811120+00:00
@@ -16,11 +16,10 @@
from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity

assertions = unittest.TestCase()


-
class MyEngineCache(BaseEngineCache):
    def __init__(
        self,
        engine_cache_dir: str,
    ) -> None:
@@ -55,111 +54,125 @@
                blob = f.read()
            self.hashes[hash] += 1
            return blob
        return None

+
class TestHashFunction(TestCase):

    def test_reexport_is_equal(self):
        pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
        example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
        batch = torch.export.Dim("batch", min=1, max=200)

        exp_program1 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs1 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(100, 3, 224, 224),
+                max_shape=(200, 3, 224, 224),
+            ),
+        )
        settings1 = CompilationSettings(
-            make_refittable=True,
-            cache_built_engines=True,
-            reuse_cached_engines=True
+            make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
        )
        hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)

        exp_program2 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs2 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs2 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(100, 3, 224, 224),
+                max_shape=(200, 3, 224, 224),
+            ),
+        )
        settings2 = CompilationSettings(
-            make_refittable=True,
-            cache_built_engines=True,
-            reuse_cached_engines=True
+            make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
        )
        hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)

        self.assertEqual(hash1, hash2)
-
-

    def test_input_shape_change_is_not_equal(self):
        pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
        example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
        batch = torch.export.Dim("batch", min=1, max=200)

        exp_program1 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs1 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(100, 3, 224, 224),
+                max_shape=(200, 3, 224, 224),
+            ),
+        )
        settings1 = CompilationSettings(
-            make_refittable=True,
-            cache_built_engines=True,
-            reuse_cached_engines=True
+            make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
        )
        hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)

        exp_program2 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs2 = (torch_trt.Input(min_shape=(1, 3, 300, 300), opt_shape=(100, 3, 300, 300), max_shape=(200, 3, 300, 300)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs2 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 300, 300),
+                opt_shape=(100, 3, 300, 300),
+                max_shape=(200, 3, 300, 300),
+            ),
+        )
        settings2 = CompilationSettings(
-            make_refittable=True,
-            cache_built_engines=True,
-            reuse_cached_engines=True
+            make_refittable=True, cache_built_engines=True, reuse_cached_engines=True
        )
        hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)

        self.assertNotEqual(hash1, hash2)
-

    def test_engine_settings_is_not_equal(self):
        pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
        example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
        batch = torch.export.Dim("batch", min=1, max=200)

        exp_program1 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs1 = (torch_trt.Input(min_shape=(1, 3, 224, 224), opt_shape=(100, 3, 224, 224), max_shape=(200, 3, 224, 224)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs1 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(100, 3, 224, 224),
+                max_shape=(200, 3, 224, 224),
+            ),
+        )
        settings1 = CompilationSettings(
            make_refittable=True,
            cache_built_engines=True,
            reuse_cached_engines=True,
-            enabled_precisions={torch.float32}
+            enabled_precisions={torch.float32},
        )
        hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1)

        exp_program2 = torch.export.export(
-            pyt_model,
-            args=example_inputs,
-            dynamic_shapes={"x": {0: batch}}
-        )
-        input_specs2 = (torch_trt.Input(min_shape=(1, 3, 300, 300), opt_shape=(100, 3, 300, 300), max_shape=(200, 3, 300, 300)),)
+            pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}}
+        )
+        input_specs2 = (
+            torch_trt.Input(
+                min_shape=(1, 3, 300, 300),
+                opt_shape=(100, 3, 300, 300),
+                max_shape=(200, 3, 300, 300),
+            ),
+        )
        settings2 = CompilationSettings(
            make_refittable=True,
            cache_built_engines=True,
            reuse_cached_engines=True,
-            enabled_precisions={torch.float32, torch.float16}
+            enabled_precisions={torch.float32, torch.float16},
        )
        hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2)

        self.assertNotEqual(hash1, hash2)

@@ -207,11 +220,11 @@
                debug=False,
                min_block_size=1,
                make_refittable=True,
                cache_built_engines=cache_built_engines,
                reuse_cached_engines=reuse_cached_engines,
-                engine_cache_dir=engine_cache_dir
+                engine_cache_dir=engine_cache_dir,
            )
            end.record()
            torch.cuda.synchronize()
            torch._dynamo.reset()
            times.append(start.elapsed_time(end))
@@ -288,12 +301,17 @@
        assertions.assertTrue(
            cos_sim > COSINE_THRESHOLD,
            msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
        )

-        [assertions.assertTrue(count == 1, f"cache was not hit exactly once for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
-
+        [
+            assertions.assertTrue(
+                count == 1,
+                f"cache was not hit exactly once for entry ({h}, hit: {count})",
+            )
+            for h, count in custom_engine_cache.hashes.items()
+        ]

    def test_dynamo_compile_change_input_shape(self):
        """Runs compilation 3 times, the cache should miss each time"""
        model = models.resnet18(pretrained=True).eval().to("cuda")
        # Mark the dim0 of inputs as dynamic
@@ -303,11 +321,11 @@
            shutil.rmtree(engine_cache_dir)

        custom_engine_cache = MyEngineCache(engine_cache_dir)

        for i in range(3):
-            inputs = (torch.rand((4*(i + 1), 3, 224, 224)).to("cuda"),)
+            inputs = (torch.rand((4 * (i + 1), 3, 224, 224)).to("cuda"),)
            trt_gm = torch_trt.dynamo.compile(
                torch.export.export(model, args=inputs),
                inputs=inputs,
                use_python_runtime=False,
                enabled_precisions={torch.float},
@@ -316,11 +334,16 @@
                make_refittable=True,
                cache_built_engines=True,
                reuse_cached_engines=True,
            )

-        [assertions.assertTrue(count == 0, f"Unintended cache hit for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
+        [
+            assertions.assertTrue(
+                count == 0, f"Unintended cache hit for entry ({h}, hit: {count})"
+            )
+            for h, count in custom_engine_cache.hashes.items()
+        ]

    @pytest.mark.xfail
    def test_torch_compile_with_default_disk_engine_cache(self):
        # Custom Engine Cache
        model = models.resnet18(pretrained=True).eval().to("cuda")
@@ -360,11 +383,11 @@
                    "make_refittable": True,
                    "cache_built_engines": cache_built_engines,
                    "reuse_cached_engines": reuse_cached_engines,
                    "engine_cache_dir": engine_cache_dir,
                    "engine_cache_size": 1 << 30,  # 1GB
-                    "torch_executed_ops": {"torch.ops.aten.relu.default"}
+                    "torch_executed_ops": {"torch.ops.aten.relu.default"},
                },
            )
            results.append(compiled_model(*inputs))  # trigger the compilation
            end.record()
            torch.cuda.synchronize()
@@ -426,11 +449,11 @@
                    "min_block_size": 1,
                    "make_refittable": True,
                    "cache_built_engines": cache_built_engines,
                    "reuse_cached_engines": reuse_cached_engines,
                    "custom_engine_cache": custom_engine_cache,
-                    "torch_executed_ops": {"torch.ops.aten.relu.default"}
+                    "torch_executed_ops": {"torch.ops.aten.relu.default"},
                },
            )
            results.append(compiled_model(*inputs))  # trigger the compilation
            end.record()
            torch.cuda.synchronize()
@@ -447,13 +470,17 @@
        assertions.assertTrue(
            cos_sim > COSINE_THRESHOLD,
            msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
        )

-        [assertions.assertTrue(count == 1, f"cache was not hit exactly once for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
-
-
+        [
+            assertions.assertTrue(
+                count == 1,
+                f"cache was not hit exactly once for entry ({h}, hit: {count})",
+            )
+            for h, count in custom_engine_cache.hashes.items()
+        ]

    def test_torch_compile_change_input_shape(self):
        # Custom Engine Cache
        model = models.resnet18(pretrained=True).eval().to("cuda")

@@ -475,10 +502,15 @@
                    "min_block_size": 1,
                    "make_refittable": True,
                    "cache_built_engines": True,
                    "reuse_cached_engines": True,
                    "custom_engine_cache": custom_engine_cache,
-                    "torch_executed_ops": {"torch.ops.aten.relu.default"}
+                    "torch_executed_ops": {"torch.ops.aten.relu.default"},
                },
            )

-        [assertions.assertTrue(count == 0, f"Unintended cache hit for entry ({h}, hit: {count})") for h, count in custom_engine_cache.hashes.items()]
+        [
+            assertions.assertTrue(
+                count == 0, f"Unintended cache hit for entry ({h}, hit: {count})"
+            )
+            for h, count in custom_engine_cache.hashes.items()
+        ]

fix: Adding shape distingushing to the engine cache

b4998f5

narendasan requested review from peri044 and zewenli98 September 11, 2024 04:37

facebook-github-bot added the cla signed label Sep 11, 2024

github-actions bot added component: tests Issues re: Tests component: conversion Issues re: Conversion stage component: api [Python] Issues re: Python API component: runtime component: dynamo Issues relating to the `torch.compile` or `torch._dynamo.export` paths labels Sep 11, 2024

github-actions bot requested changes Sep 11, 2024

View reviewed changes

narendasan closed this Sep 11, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix: Adding shape distingushing to the engine cache #3154

fix: Adding shape distingushing to the engine cache #3154

narendasan commented Sep 11, 2024

github-actions bot left a comment

github-actions bot left a comment

github-actions bot left a comment

fix: Adding shape distingushing to the engine cache #3154

fix: Adding shape distingushing to the engine cache #3154

Conversation

narendasan commented Sep 11, 2024

Description

Type of change

Checklist:

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment

github-actions bot left a comment

Choose a reason for hiding this comment